// cd /projects/data_commons/cw_code/
// qstata cw_ind_3_fk_naics12.do &

global dir_proj = "/projects/data_commons/"
global dir_cw = "${dir_proj}/cw/"
global dir_lbd = "${dir_proj}/lbd/"

di "Started at $S_DATE $S_TIME"

//==============================================================================
// Take a look how different versions of fk_naics differ

use "${dir_lbd}/lbd_raw_sum_fk_comp.dta", clear

foreach iver in 02 07 {
	gen fk_naics = fk`iver'
	merge m:1 fk_naics using ${dir_cw}/cw_ind_fk_imp, keep(match master) gen(m_fk`iver')
	tab fk_naics ch_ind if m_fk`iver' != 3
	drop fk_naics
	rename ch_ind ch_ind_`iver'
}

collapse (sum) emp, by(year fk12 ch_ind_07)
gen n_ind = 1
by year fk12: egen emp_max = max(emp)
gen ch_ind_07_maxi = ch_ind_07 if emp == emp_max
by year fk12: egen ch_ind_07_max = mean(ch_ind_07_maxi)

collapse (sum) n_ind emp (max) emp_max2=emp (first) emp_max ch_ind_07_max, by(year fk12)
count if emp_max != emp_max2

keep if inlist(year, 2013, 2014)
sum n_ind
sum n_ind if n_ind > 1

//------------------------------------------------------------------------------
// For fk mapped to multiple ind, share of emp in the largest ind

gen emps_max = emp_max / emp

sum emps_max if n_ind > 1, d
sum emps_max [aw=emp] if n_ind > 1, d

//------------------------------------------------------------------------------
// Show share of employment associated with fk linked to multiple ind

capture program drop f_thld
program f_thld
	args iperc

	di "`iperc'"
	preserve
	gen emps_max_thld = 1 if emps_max >= `iperc'
	gen emp_miss = emp - emp_max // emp miscategorized
	collapse (sum) emp emp_miss, by(year emps_max_thld)
	by year: egen emp_tot = total(emp)
	gen emps = emp / emp_tot
	gen emps_miss = emp_miss / emp_tot
	list
	restore
end

f_thld 0.99
f_thld 0.95

//------------------------------------------------------------------------------
// Assign ch_ind_07_max to fk_naics12

sort fk12 ch_ind year
by fk12 ch_ind: gen n_row = _N
tab n_row

list if n_row == 1

// Use Industry in 2014
keep if year == 2014
keep fk12 ch_ind
duplicates drop
isid fk12

recast double ch_ind
replace ch_ind = round(ch_ind, 0.1)

rename (fk12 ch_ind_07_max) (fk_naics ch_ind)
save ${dir_cw}/cw_ind_fk_imp_fk12, replace
saveold ${dir_cw}/cw_ind_fk_imp_fk12_v12, replace v(12)
export delimited ${dir_cw}/cw_ind_fk_imp_fk12.csv, replace

di "Ended at $S_DATE $S_TIME"
// End of do file
